Skip to content

Commit c055b04

Browse files
Prewarm LLM cache (#6692)
* Prewarm LLM cache * pre-warm LLM cache only if project defined --------- Co-authored-by: Ryan Johnson <rjohnson@mozilla.com>
1 parent ed7c12d commit c055b04

File tree

5 files changed

+14
-9
lines changed

5 files changed

+14
-9
lines changed

.env-test

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,3 @@ REUSE_DB=0
2222
ENABLE_ADMIN=True
2323
SET_LOCALE_PATH=False
2424
SECURE_SSL_REDIRECT=False
25-
GOOGLE_APPLICATION_CREDENTIALS=creds
26-
GOOGLE_CLOUD_PROJECT=sumo-test

kitsune/llm/apps.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
from django.apps import AppConfig
2+
from django.conf import settings
23

34

45
class LLMConfig(AppConfig):
56
name = "kitsune.llm"
67
default_auto_field = "django.db.models.AutoField"
8+
9+
def ready(self):
10+
from kitsune.llm.utils import get_llm
11+
12+
if settings.GOOGLE_CLOUD_PROJECT:
13+
# pre-warm the LLM cache
14+
get_llm()

kitsune/llm/questions/classifiers.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from kitsune.llm.utils import get_llm
88
from kitsune.products.utils import get_taxonomy
99

10-
DEFAULT_LLM_MODEL = "gemini-2.5-flash-preview-04-17"
1110
HIGH_CONFIDENCE_THRESHOLD = 75
1211
LOW_CONFIDENCE_THRESHOLD = 60
1312

@@ -26,7 +25,7 @@ def classify_question(question: "Question") -> dict[str, Any]:
2625
Analyze a question for spam and, if not spam or low confidence, classify the topic.
2726
Returns a dict with keys: action, spam_result, topic_result (optional).
2827
"""
29-
llm = get_llm(model_name=DEFAULT_LLM_MODEL)
28+
llm = get_llm()
3029

3130
product = question.product
3231
payload: dict[str, Any] = {

kitsune/llm/utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
from functools import cache
1+
from functools import lru_cache
22

33
from langchain.chat_models.base import BaseChatModel
44

5+
DEFAULT_LLM_MODEL = "gemini-2.5-flash-preview-04-17"
56

6-
@cache
7+
8+
@lru_cache(maxsize=1)
79
def get_llm(
8-
model_name: str,
10+
model_name: str = DEFAULT_LLM_MODEL,
911
temperature: int = 1,
1012
max_tokens: int | None = None,
1113
max_retries: int = 2,

kitsune/settings.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,8 +1337,6 @@ def filter_exceptions(event, hint):
13371337

13381338
USER_INACTIVITY_DAYS = config("USER_INACTIVITY_DAYS", default=1095, cast=int)
13391339

1340-
if DEV:
1341-
GOOGLE_APPLICATION_CREDENTIALS = config("GOOGLE_APPLICATION_CREDENTIALS", default="")
13421340
GOOGLE_CLOUD_PROJECT = config("GOOGLE_CLOUD_PROJECT", default="")
13431341

13441342
# shell_plus conf

0 commit comments

Comments
 (0)